## ##############################################################
## Use:               Reproduction Script -> Data analysis
##
## Paper:             Stages, Orders & Periods of Welfare State Development: 
##                    A Global Sequence Analysis of Social Policy Dynamics
##
##
## Author:            Fabian Besche-Truthe, Tobias Boeger, Johanna Fischer
##
## Content:           1. Prerequisites
##                    2. Create Event Intensity plots (for decades)
##                    3. Create and Plot Ranks of Adoptions 
##                    4. Create Welfare Profiles for every 5 years
##                    5. Create Sankey Plots of Sequences
##                    6. Start classic Sequence Analysis
##                    7. Cluster Analysis of Sequences
##                    8. Sequence Analysis Multistate Model (SAMM)
##
##                    
#################################################################



#############################################
######### 1. Preperation ####################
#############################################


library(tidyverse)
library(reshape2)
library(TraMineR)
library(countrycode)
library(rworldmap)
library(vtable)
library(networkD3)
library(htmlwidgets)
library(cluster)
library(NbClust)
library(factoextra)
library(RColorBrewer)

## open data
## Single citation can be found in the paper itself
df <- read_csv("sequences_data_reproduction.csv")

##########################################################################
######### 2. Create Event Intensity plots (Figure 1)  #################
##########################################################################
## create long version of df
df_long <- melt(df, id.vars = c("iso3", "year", "cow_code"))
unique(df_long$variable)
df_long$variable <- as.character(df_long$variable)
df_long$value <- as.numeric(df_long$value)


## recode values for decades
#i.e. 1885 == 1880-1889; 1895 == 1890-1899; 1995 == 1900-1909 .....

df_long_decades <- df_long

names(df_long_decades)
df_long_decades$value_decade <- NA
df_long_decades <- df_long_decades %>% 
  mutate(value_decade = case_when(
    value <= 1829 ~ "1880-1889",
    value >= 1830 & value <= 1839 ~ "1830-1839",
    value >= 1840 & value <= 1849 ~ "1840-1849",
    value >= 1850 & value <= 1859 ~ "1850-1859",
    value >= 1860 & value <= 1869 ~ "1860-1869",
    value >= 1870 & value <= 1879 ~ "1870-1879",
    value >= 1880 & value <= 1889 ~ "1880-1889",
    value >= 1890 & value <= 1899 ~ "1890-1899",
    value >= 1900 & value <= 1909 ~ "1900-1909",
    value >= 1910 & value <= 1919 ~ "1910-1919",
    value >= 1920 & value <= 1929 ~ "1920-1929",
    value >= 1930 & value <= 1939 ~ "1930-1939",
    value >= 1940 & value <= 1949 ~ "1940-1949",
    value >= 1950 & value <= 1959 ~ "1950-1959",
    value >= 1960 & value <= 1969 ~ "1960-1969",
    value >= 1970 & value <= 1979 ~ "1970-1979",
    value >= 1980 & value <= 1989 ~ "1980-1989",
    value >= 1990 & value <= 1999 ~ "1990-1999",
    value >= 2000 & value <= 2009 ~ "2000-2009",
    value >= 2010 & value <= 2019 ~ "2010-2019",
    value >= 2020 ~ "2020"
  ))

## sort countries for a better plot
df_long_sorted_decades <- df_long_decades
df_long_sorted_decades$iso3 <- reorder(df_long_sorted_decades$iso3, df_long_sorted_decades$value_decade, FUN = min)
df_long_sorted_decades$iso3 <- factor(df_long_sorted_decades$iso3, levels = rev(levels(df_long_sorted_decades$iso3)))

df_long_sorted_decades <- df_long_sorted_decades %>% filter(!is.na(value))


## summarize counts per decade and variable
event_intense_decade <- df_long_sorted_decades  

event_intense_decade <- event_intense_decade %>% 
  mutate(stub = case_when(
    is.na(value_decade) ~ 0,
    !is.na(value_decade) ~ 1
  )) 

head(event_intense_decade)
test <- event_intense_decade %>% filter(iso3 == "ITA")

## count adoptions per decade
event_intense_decade <- event_intense_decade %>% 
  filter(variable == "health_intro" |
           variable == "ltc_intro" |
           variable == "edu_intro" |
           variable == "workinj_intro" |
           variable == "pension_intro" |
           variable == "matleave_intro" |
           variable == "unemp_intro" |
           variable == "ecec_intro" ) %>% 
  select(!year) %>% distinct()

event_intense_decade <- event_intense_decade %>% group_by(variable, value_decade) %>% 
  summarise(n_adoptions = sum(stub, na.rm = TRUE))  %>% ungroup()

event_intense_decade <- event_intense_decade %>% filter(!is.na(value_decade))

event_intense_decade <- dcast(event_intense_decade, variable ~ value_decade, value.var = "n_adoptions")

event_intense_decade[is.na(event_intense_decade)] <- 0
event_intense_decade <- melt(event_intense_decade, id.vars = "variable")
colnames(event_intense_decade)[2:3] <- c("value_decade", "n_adoptions")

event_intense_decade <- event_intense_decade %>% filter(!value_decade == 2020)

## Make event intensity plot without stacked bars

event_intense_decade %>% filter(!value_decade == "1830-1839") %>% 
  filter(!value_decade == "1840-1849") %>% filter(!value_decade == "1850-1859") %>% 
  filter(!value_decade == "1860-1869") %>% filter(!value_decade == "1870-1879") %>% 
  ggplot(aes(x = value_decade,y = n_adoptions, fill = variable)) +
  geom_col(width= 2.5, position = position_dodge(0.9)) +
  scale_fill_brewer("Social Policy", palette = "Set3", labels = c("Early Childhood Education and Care", 
                                                                  "Education", "Health",
                                                                  "Long Term Care", "Maternity Leave",
                                                                  "Pension", "Unemployment",
                                                                  "Work Injury")) +
  ylab("Adoptions (count)") +
  xlab("Decade") +
  theme_bw() + theme(axis.text.x = element_text(face="italic", size = 8))



#ggsave("Besche-Truthe_Fig1.tiff", units="in", width=12, height=3, dpi=1000, compression = 'lzw')

#################################################################
######### 3. Create and Plot Ranks of Adoption (Fig. 2)  ########
#################################################################

### Rank social policy adoptions
## split df to run loop for every social policy
split_socpol <- split(df_long, df_long$variable)

## create target df
socpol_ranks <- as.data.frame(matrix(nrow=0,ncol=3))
colnames(socpol_ranks) <- c("iso3", "variable", "socpol_rank")
## create interims df
df_int <- data.frame()

## commence loop 

for (i in seq_along(split_socpol)) {
  
  # save single case
  df_int <- as.data.frame(split_socpol[[i]])
  
  df_int <- df_int %>% select(iso3, variable, value) %>% 
    filter(!is.na(value)) %>% distinct()
  #select relevant columns and delete non-adoption
  df_int <- df_int %>%
    mutate(socpol_rank = order(order(value, decreasing=FALSE)))
  
  df_int <- df_int %>% select(iso3, variable, socpol_rank)
  #join with inteded target df 
  socpol_ranks <- rbind(socpol_ranks, df_int)
}

head(socpol_ranks)
socpol_ranks <- dcast(socpol_ranks, socpol_rank ~ variable, value.var = "iso3")

### Create adoption sequence for countries without care for timing
### thus just the sequence in which single social policies were adopted

## split df to run loop on every single case
df_long_single <- df_long %>% filter(variable == "health_intro" |
                                       variable == "ltc_intro" |
                                       variable == "edu_intro" |
                                       variable == "workinj_intro" |
                                       variable == "pension_intro" |
                                       variable == "matleave_intro" |
                                       variable == "unemp_intro" |
                                       variable == "ecec_intro" )

split_iso <- split(df_long_single, df_long_single$iso3)

## create target df
adoption_sequence <- as.data.frame(matrix(nrow=0,ncol=3))
colnames(adoption_sequence) <- c("iso3", "variable", "adoption")
#create interims df
df_int <- data.frame()


# commence loop

for (i in seq_along(split_iso)) {
  
  # save single case
  df_int <- as.data.frame(split_iso[[i]])
  #select relevant columns and delete non-adoption
  df_int <- df_int %>% select(iso3, variable, value) %>% 
    filter(!is.na(value)) %>% distinct()
  #order in order of adoption
  df_int <- df_int[order(df_int$value),] 
  #create new variable to flag adoption
  df_int$adoption <- 1:nrow(df_int)
  df_int <- df_int %>% select(!value)
  #join with inteded target df 
  adoption_sequence <- rbind(adoption_sequence, df_int)
}

head(adoption_sequence)
adoption_sequence <- adoption_sequence %>% mutate(variable= ifelse(variable == "health_intro", "health", variable),
                                                  variable = ifelse(variable == "ltc_intro", "ltc", variable),
                                                  variable = ifelse(variable == "edu_intro", "edu", variable),
                                                  variable = ifelse(variable == "workinj_intro", "workinj", variable),
                                                  variable = ifelse(variable == "pension_intro", "pension", variable),
                                                  variable = ifelse(variable == "matleave_intro", "matleave", variable),
                                                  variable = ifelse(variable == "unemp_intro", "unemp", variable),
                                                  variable = ifelse(variable == "ecec_intro", "ecec", variable),
                                                  variable = ifelse(variable == "workrelated_intro", "workrelated", variable),
                                                  variable = ifelse(variable == "basicserv", "basicserv", variable),
                                                  variable = ifelse(variable == "care_intro", "care", variable)
)

## count number of adoption per rank
adoption_sequence$stub <- 1
adoption_sequence$adoption <- as.factor(adoption_sequence$adoption)

adoption_sequence_grouped <- adoption_sequence %>% 
  group_by(variable, adoption) %>% 
  summarise(count_ranks = sum(stub))

adoption_sequence_grouped$count_ranks[is.na(adoption_sequence_grouped$count_ranks)] <- 0

adoption_sequence_grouped <- adoption_sequence_grouped %>% mutate(adoption = as.factor(adoption)) 
adoption_sequence_grouped$adoption <- factor(adoption_sequence_grouped$adoption, levels=rev(levels(adoption_sequence_grouped$adoption)))


### make plot for adoption ranks
adoption_sequence_grouped %>% 
  ggplot(aes(x = count_ranks, y = adoption, fill = variable)) +
  geom_col() +
  scale_fill_brewer("Social Policy", palette = "Set3", labels = c("Early Childhood Education and Care", 
                                                                  "Education", "Health",
                                                                  "Long Term Care", "Maternity Leave",
                                                                  "Pension", "Unemployment",
                                                                  "Work Injury")) +
  guides(col = guide_legend(reverse = TRUE)) +
  ylab("Position of Adoption") +
  xlab("Count") + theme_bw() 


#ggsave("Besche-Truthe_Fig2.tiff", units="in", width=10, height=3.5, dpi=1000, compression = 'lzw')


#########################################################################
#########  4. Create Welfare Profiles for every 5 years #################
########################################################################

## Create quins of social policy profiles from 1880 - 2019
df[df == 9999] <- NA

intro_quin <- df %>% filter(year >= 1880 & year < 2020) %>% 
  select(iso3, year, workrelated_intro, edu_intro, health_intro, care_intro)


intro_quin$quin <- NA

intro_quin$quin[intro_quin$year >= 1880 &
                  intro_quin$year <= 1884] <- 1

intro_quin$quin[intro_quin$year >= 1885 &
                  intro_quin$year <= 1889] <- 2

intro_quin$quin[intro_quin$year >= 1890 &
                  intro_quin$year <= 1894] <- 3

intro_quin$quin[intro_quin$year >= 1895 &
                  intro_quin$year <= 1899] <- 4

intro_quin$quin[intro_quin$year >= 1900 &
                  intro_quin$year <= 1904] <- 5

intro_quin$quin[intro_quin$year >= 1905 &
                  intro_quin$year <= 1909] <- 6

intro_quin$quin[intro_quin$year >= 1910 &
                  intro_quin$year <= 1914] <- 7

intro_quin$quin[intro_quin$year >= 1915 &
                  intro_quin$year <= 1919] <- 8

intro_quin$quin[intro_quin$year >= 1920 &
                  intro_quin$year <= 1924] <- 9

intro_quin$quin[intro_quin$year >= 1925 &
                  intro_quin$year <= 1929] <- 10

intro_quin$quin[intro_quin$year >= 1930 &
                  intro_quin$year <= 1934] <- 11

intro_quin$quin[intro_quin$year >= 1935 &
                  intro_quin$year <= 1939] <- 12

intro_quin$quin[intro_quin$year >= 1940 &
                  intro_quin$year <= 1944] <- 13

intro_quin$quin[intro_quin$year >= 1945 &
                  intro_quin$year <= 1949] <- 14

intro_quin$quin[intro_quin$year >= 1950 &
                  intro_quin$year <= 1954] <- 15

intro_quin$quin[intro_quin$year >= 1955 &
                  intro_quin$year <= 1959] <- 16

intro_quin$quin[intro_quin$year >= 1960 &
                  intro_quin$year <= 1964] <- 17

intro_quin$quin[intro_quin$year >= 1965 &
                  intro_quin$year <= 1969] <- 18

intro_quin$quin[intro_quin$year >= 1970 &
                  intro_quin$year <= 1974] <- 19

intro_quin$quin[intro_quin$year >= 1975 &
                  intro_quin$year <= 1979] <- 20

intro_quin$quin[intro_quin$year >= 1980 &
                  intro_quin$year <= 1984] <- 21

intro_quin$quin[intro_quin$year >= 1985 &
                  intro_quin$year <= 1989] <- 22

intro_quin$quin[intro_quin$year >= 1990 &
                  intro_quin$year <= 1994] <- 23

intro_quin$quin[intro_quin$year >= 1995 &
                  intro_quin$year <= 1999] <- 24

intro_quin$quin[intro_quin$year >= 2000 &
                  intro_quin$year <= 2004] <- 25

intro_quin$quin[intro_quin$year >= 2005 &
                  intro_quin$year <= 2009] <- 26

intro_quin$quin[intro_quin$year >= 2010 &
                  intro_quin$year <= 2014] <- 27

intro_quin$quin[intro_quin$year >= 2015 &
                  intro_quin$year <= 2019] <- 28


## Create Welfare state profiles per quin

## 1st create binary var whether in place or not
head(intro_quin)
names(intro_quin)

intro_quin <- intro_quin %>% mutate(workrelated_intro = ifelse(workrelated_intro <= year, 1, NA),
                                    edu_intro = ifelse(edu_intro <= year, 1, NA),
                                    health_intro = ifelse(health_intro <= year, 1, NA),
                                    care_intro = ifelse(care_intro <= year, 1, NA))

head(intro_quin)
tail(intro_quin)

## 2nd fill possible NA per quin with fill
intro_quin <- intro_quin %>% group_by(iso3, quin) %>% 
  fill(workrelated_intro, edu_intro, health_intro, care_intro, .direction = "updown") %>% ungroup()

## 3rd fill remaining NAs with 0
intro_quin <- intro_quin %>% mutate(workrelated_intro = ifelse(is.na(workrelated_intro), 0, workrelated_intro),
                                    edu_intro = ifelse(is.na(edu_intro), 0, edu_intro),
                                    health_intro = ifelse(is.na(health_intro), 0, health_intro),
                                    care_intro = ifelse(is.na(care_intro), 0, care_intro))



## Code as Welfare State Profile 
welfare_profiles <- tidyr::crossing(workrelated = 0:1, 
                                    edu = 0:1,
                                    health = 0:1, 
                                    care = 0:1)

## assign name for welfare profile
welfare_profiles$row <- 1:nrow(welfare_profiles)
welfare_profiles$w_profile <- c("Z", 
                                "C", 
                                "H", 
                                "C_H", 
                                "E", 
                                "C_E",
                                "E_H",
                                "C_E_H",
                                "W",
                                "C_W",
                                "H_W",
                                "C_H_W",
                                "E_W",
                                "C_E_W",
                                "E_H_W",
                                "C_E_H_W")

welfare_profiles$row <- NULL


## saving the profiles for easier application
save_profiles <- welfare_profiles
save_profiles <- save_profiles%>% mutate(workrelated = ifelse(workrelated == 1, "workrelated", ""),
                                         edu = ifelse(edu == 1, "edu", ""),
                                         health = ifelse(health == 1, "health", ""),
                                         care = ifelse(care == 1, "care", ""))

#write_csv(save_profiles, "FOUR_aggregated_welfare_profiles.csv")


## assign welfare profiles to country-quins
head(welfare_profiles)

head(intro_quin)
intro_binary <- intro_quin %>% select(iso3, workrelated_intro, edu_intro,
                                      health_intro, care_intro, quin) %>% 
  distinct()

## create scheme variable
welfare_profiles$scheme_profiles <- paste0(welfare_profiles$workrelated, welfare_profiles$edu,
                                           welfare_profiles$health, welfare_profiles$care)

intro_binary$scheme_profiles <- paste0(intro_binary$workrelated_intro, intro_binary$edu_intro,
                                       intro_binary$health_intro, intro_binary$care_intro)

## extract only schemes
head(welfare_profiles)
welfare_profiles_singles <- welfare_profiles %>% select(scheme_profiles, w_profile)


## join profiles based on scheme
head(intro_binary)
head(welfare_profiles_singles)
intro_binary <- left_join(intro_binary, welfare_profiles_singles, by = "scheme_profiles")

## save single profiles per quin
head(intro_binary)

profiles_quin_long <- intro_binary %>% select(iso3, quin, w_profile)   

profiles_quin_long %>% select(w_profile) %>% distinct()

head(profiles_quin_long)

## make wide format
profiles_quin_wide <- profiles_quin_long  %>% distinct() %>% 
  dcast(iso3 ~ quin, value.var = "w_profile")


###############################################################
######### 5. Create Sankey Plots of Sequences (Fig. 3) ########
###############################################################

## create empty df for later use

ds_raw <- data.frame()
ds <- data.frame()
output_ds <- profiles_quin_long %>% select(iso3) %>% distinct()

## split df to run loop on every single decade 
split_quin <- split(profiles_quin_long, profiles_quin_long$quin)

w_profile_quin_0 <- profiles_quin_long %>% filter(quin == 0) 

## commence loop 

for (i in seq_along(split_quin)) {
  
  # save single year
  ds_raw <- as.data.frame(split_quin[[i]])
  
  # only keep welfare profile 
  ds <- ds_raw %>% select(iso3, w_profile) %>% distinct()
  colnames(ds)[2] <- paste0("w_profile_quin_", ds_raw$quin[1])
  
  #join to output dataframe
  output_ds <- left_join(output_ds, ds, by = "iso3")
  output_ds <- output_ds %>% distinct()
}

output_ds

## build dataset with complete linklist  
links_w_profiles <- output_ds %>% select(iso3, w_profile_quin_1, w_profile_quin_2)
colnames(links_w_profiles) <- c("iso3", "source", "target")

lorem <- output_ds %>% select(iso3, w_profile_quin_2, w_profile_quin_3)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_3, w_profile_quin_4)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_4, w_profile_quin_5)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_5, w_profile_quin_6)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_6, w_profile_quin_7)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_7, w_profile_quin_8)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_8, w_profile_quin_9)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_9, w_profile_quin_10)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_10, w_profile_quin_11)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_11, w_profile_quin_12)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_12, w_profile_quin_13)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_13, w_profile_quin_14)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_14, w_profile_quin_15)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_15, w_profile_quin_16)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_16, w_profile_quin_17)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)
zwischen_links <- links_w_profiles

lorem <- output_ds %>% select(iso3, w_profile_quin_17, w_profile_quin_18)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_18, w_profile_quin_19)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_19, w_profile_quin_20)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_20, w_profile_quin_21)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_21, w_profile_quin_22)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_22, w_profile_quin_23)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_23, w_profile_quin_24)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_24, w_profile_quin_25)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_25, w_profile_quin_26)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_26, w_profile_quin_27)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)

lorem <- output_ds %>% select(iso3, w_profile_quin_27, w_profile_quin_28)
colnames(lorem) <- c("iso3", "source", "target")
links_w_profiles <- rbind(links_w_profiles, lorem)


## create dummy variable to sum up with
links_w_profiles$dummy <- 1

## group by transitions
links_w_profiles <- links_w_profiles %>% group_by(source, target) %>% 
  summarise(sum_dummy = sum(dummy)) %>% ungroup()

## delete selfloops -> specifically W1->W1 which is 96
links_w_profiles <- links_w_profiles %>% filter(source != target)

## From these transitions create a node data frame: it lists every entities involved in the flow
nodes <- data.frame(
  name=c(as.character(links_w_profiles$source), 
         as.character(links_w_profiles$target)) %>% unique()
)

## With networkD3, connection must be provided using id, not using real name like in the links dataframe.. So we need to reformat it.
links_w_profiles$IDsource <- match(links_w_profiles$source, nodes$name)-1 
links_w_profiles$IDtarget <- match(links_w_profiles$target, nodes$name)-1

links_w_profiles <- as.data.frame(links_w_profiles)

links <- links_w_profiles %>% select(source, target, sum_dummy, IDsource, IDtarget)
colnames(links) <- c("source", "target", "value", "IDsource", "IDtarget")

## Make the Network
## For 13 colors now!
mycolors <- c("#006ddb",
              "#490092",
              "#000000",
              "#004949",
              "#920000",
              "#6db6ff",
              "#b66dff",
              "#ff6db6",
              "#009292",
              "#FF2E2E",
              "#db6d00",
              "#ffff6d",
              "#FFFFFF"
)

#nb.cols <- 8
#mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
mycolors
#colors <- 'd3.scaleOrdinal(["#FFFFFF", "#004949","#ff6db6",
#              "#490092","#006ddb","#000000","#b66dff","#6db6ff",
#              "#920000","#009292","#db6d00","#24ff24","#ffff6d"])'


colors <- 'd3.scaleOrdinal(["#006ddb", "#490092", "#004949",
            "#6db6ff","#b66dff","#ff6db6","#009292","#FF2E2E",
          "#db6d00","#ffff6d","#FFFFFF","#000000", "#920000"])'


p <- sankeyNetwork(Links = links, Nodes = nodes,
                   Source = "IDsource", Target = "IDtarget",
                   Value = "value", NodeID = "name",
                   colourScale = colors,  fontSize = 20,
                   width = 1200,
                   height = 400,
                   sinksRight=FALSE)
p


## saved from Viewer


################################################################
######### 6. Sequence Analysis (Fig. 4)  #################
################################################################

## defining id variable
id <- profiles_quin_wide$iso3



## define color palette
mycolors <- c("#006ddb",
              "#490092",
              "#000000",
              "#004949",
              "#920000",
              "#6db6ff",
              "#b66dff",
              "#ff6db6",
              "#009292",
              "#FF2E2E",
              "#db6d00",
              "#ffff6d",
              "#FFFFFF"
)

colnames(profiles_quin_wide)[2:29] <- c("1880", "1885", "1890", "1895", "1900", 
                                        "1905", "1910", "1915", "1920", "1925", 
                                        "1930", "1935", "1940", "1945", "1950", 
                                        "1955", "1960", "1965", "1970", "1975", 
                                        "1980", "1985", "1990", "1995", "2000", 
                                        "2005", "2010", "2015")
## defining sequences
soc_pol_seq <- seqdef(profiles_quin_wide,2:29, id = id, cpal = mycolors)

summary(soc_pol_seq)


## ploting the sequences
dev.off()
dev.new()

seqIplot(soc_pol_seq, sortv = "from.start", with.legend = "right", border = NA, ylab = "Sequences", xlab = "Every 5 years from 1880", yaxis = F, main= "All Sequences (n=164)")


##saved from viewer

#######################################################################
######### 7. Cluster Analysis of Sequences  (Fig. 5 + Fig. 7) ########
#######################################################################

#create own substitution cost matrix where OM matching is similar to Abbott and DeViney (1992)
# "Following Abbott and DeViney (1992), we define substitution costs 
# using a ‘matching’ measure between the two consolidation profiles, 
# weighing the absence of consolidation as heavily as its presence. 
# For indels, we set the cost at one-half of the maximum substitution cost, 
# reflecting our assumption that the order of events is slightly more 
# important than their duration (see also Abbott and DeViney 1992 and 
# Studer and Ritschard 2016)."


#extract automatically created cost matrix to match order
automatic_matrix <- seqsubm(soc_pol_seq, method = "CONSTANT")

## load cost matrix based on matching system by Abbott and DeViney
own_dist_matrix <- read_delim("FOUR_own_om_distance_matrix.csv", 
                              delim = ";", escape_double = FALSE, trim_ws = TRUE)

own_dist_matrix$...1 <- NULL

own_dist_matrix <- as.matrix(own_dist_matrix)

rownames(own_dist_matrix) <- c(colnames(own_dist_matrix))

#check for sorting
automatic_matrix
own_dist_matrix

rm(automatic_matrix)

##computing distance matrix
om_dist_own <- seqdist(soc_pol_seq, method = "OM", sm = own_dist_matrix, with.missing = F, indel = max(own_dist_matrix)/2)


### Cluster
#I offer here multiple cluster solutions. However, in the published paper we decided on
#4 clusters using the Partitioning Around Medoids algorithm. I present the other solutions 
#to guarantee maximum transparency and a hint to possible other solutions.

## clustering based on the transition costs

## Elbow method
fviz_nbclust(om_dist_own, cluster::pam, method = "wss") +
  geom_vline(xintercept = 6, linetype = 2)+
  labs(subtitle = "Elbow method")

## Silhouette method
fviz_nbclust(om_dist_own, cluster::pam, method = "silhouette")+
  labs(subtitle = "Silhouette method")


clusterpam_own_6 <- pam(om_dist_own, k = 6, diss = TRUE)
clusterpam_6 <- factor(clusterpam_own_6$clustering, labels = c("1. Mid-century Welfare States  (n = 27)", 
                                                               "2. Latecomer Basic Services  (n = 38)", 
                                                               "3. Latecomer Welfaree States (n = 23)", 
                                                               "4. Early Education Welfare States  (n = 34)", 
                                                               "5. Pioneer Welfare State (n = 17)", 
                                                               "6. Decolonizing (proto) Welfare State (n = 25)"))


dev.off()
dev.new()
seqIplot(soc_pol_seq, sortv = "from.start", with.legend = "bottom", border = NA, group = clusterpam_6, 
         ylab="", yaxis = F) 

##saved from viewer


## representative sequence
seqrplot (soc_pol_seq, group = clusterpam_6, diss = om_dist_own, border = T, stats=FALSE,
          ylab="", xlab = "Every 5 years from 1880")

##saved from viewer


## write clusters to dataframe
cluster_socpol_combined <- cbind(profiles_quin_wide, clusterpam_6)
colnames(cluster_socpol_combined)[30] <- "OM_cluster_pam_6"



cluster_socpol_combined <- cluster_socpol_combined %>% 
  select(iso3, OM_cluster_pam_6) %>% distinct()

#write_csv(cluster_socpol_combined, "Cluster_socpol_combined.csv")

##############################################################
######### 8. Show Clusters on Worldmap  (Fig. 6) ########
##############################################################

## make worldmaps with clusters

cluster_socpol_combined$country <- countrycode(cluster_socpol_combined$iso3, "iso3c", "country.name")

cluster_map <- joinCountryData2Map(cluster_socpol_combined, joinCode = "ISO3", nameJoinColumn = "iso3",nameCountryColumn = "country")

summary(cluster_map)


#cluster_map_poly <-fortify(cluster_map) 
names(cluster_socpol_combined)


dev.off()
dev.new()

mapCountryData(cluster_map, nameColumnToPlot='OM_cluster_pam_6', catMethod='categorical', 
               mapTitle= "Clusters of Sequences of Social Policy Adoption",
               colourPalette=c(brewer.pal(n = 6, name = "Dark2")), 
               oceanCol='lightblue', 
               missingCountryCol='white')
##saved from viewer



